#!/usr/bin/env python3
# I26 — Diagnostics-Leak Replay
#
# GOAL: Prove diagnostics are WRITE-ONLY. Changing diagnostic parameters (A→B) must NOT change:
#   • control decisions per tick (control trace),
#   • the set/timing of tie events,
#   • the number of control RNG calls,
#   • the sequence of control tie-resolutions for a fixed control seed.
#
# CONTROL (theory-faithful, minimal):
#   • Present-act, boolean/ordinal; no weights/potentials.
#   • Deterministic “eligibles” per tick come from fixed counters; ties occur at fixed period tie_every.
#   • At non-ties, choice is pure content-order (round-robin). At ties, use RNG *only here* with
#     Born-style weights derived from a primitive (strictly positive) kernel; RNG seeded by control_seed.
#
# DIAGNOSTICS (WRITE-ONLY):
#   • Two modes A and B with different parameters (e.g., smoothing window, faux "TCG/JFM" depths,
#     and independent diag RNG seeding). Diagnostics consume the control trace but NEVER feed back.
#   • We run Baseline(A), Variant(B), Replay(A again). Control must be identical for all runs with same control seed.
#
# ACCEPTANCE:
#   1) control_equal(A, B) = True  AND control_equal(A, A_replay) = True
#   2) rng_isolation_ok: control_rng_calls == tie_events; diagnostics make 0 control RNG calls
#   3) diag_changed: diag_hash(A) != diag_hash(B)  AND diag_hash(A) == diag_hash(A_replay)
#   4) optional: tie frequency under A equals A_replay; same seed -> identical tie picks; different control_seed -> different picks, same frequencies
#
# OUTPUTS:
#   - metrics/control_trace_*.csv (tick, choice, tie_flag)
#   - metrics/diag_summary_*.csv (diagnostic summaries & hashes)
#   - audits/i26_audit.json
#   - run_info/result_line.txt

import argparse, json, os, sys, csv, math, hashlib, random
from typing import List, Dict, Tuple

def utc_ts():
    import datetime as dt
    return dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H-%M-%SZ")

def ensure_dirs(root,*subs):
    for s in subs:
        os.makedirs(os.path.join(root,s), exist_ok=True)

def write_text(path, txt):
    with open(path,"w",encoding="utf-8") as f: f.write(txt)

def dump_json(path, obj):
    with open(path,"w",encoding="utf-8") as f: json.dump(obj,f,indent=2,sort_keys=True)

# ---------- utility hashing ----------
def sha256_bytes(b: bytes) -> str: return hashlib.sha256(b).hexdigest()
def sha256_str(s: str) -> str: return sha256_bytes(s.encode("utf-8"))

# ---------- primitive kernel & Born weights ----------
def primitive_kernel(n:int, eta:float) -> List[List[float]]:
    # start from dense ones, add eta>0, row-normalize -> strictly positive stochastic matrix
    row=[1.0+eta]*n
    M=[]
    for _ in range(n):
        s = sum(row)
        M.append([x/s for x in row])
    return M

def pf_vector(M: List[List[float]], tol: float=1e-14, itmax: int=10000) -> List[float]:
    n=len(M); v=[1.0/n]*n
    for _ in range(itmax):
        w=[0.0]*n
        for i in range(n):
            s=0.0
            for j in range(n):
                s += M[j][i]*v[j]
            w[i]=s
        s=sum(w)
        if s==0: break
        w=[x/s for x in w]
        if sum(abs(w[i]-v[i]) for i in range(n))<tol:
            return w
        v=w
    return v

def born_weights(v: List[float], power: float) -> List[float]:
    u=[max(0.0, x**power) for x in v]
    s=sum(u)
    return [x/s if s>0 else 1.0/len(u) for x in u]

# ---------- present-act control with scheduled ties ----------
def run_control(labels: List[str], H:int, tie_every:int, ctrl_seed:int, eta:float, power:float) -> Dict[str,object]:
    n=len(labels)
    # Build primitive kernel & PF->Born weights (time-invariant, derived from control structure)
    M=primitive_kernel(n, eta)
    v=pf_vector(M)
    p=born_weights(v, power)
    # Deterministic non-tie chooser (content-order round-robin)
    rr_idx=0
    rng = random.Random(ctrl_seed)
    rng_calls=0
    seq=[]; tie_mask=[]
    for t in range(H):
        is_tie = (tie_every>0 and (t % tie_every == 0))
        tie_mask.append(1 if is_tie else 0)
        if is_tie:
            u=rng.random(); rng_calls+=1
            s=0.0; choice_idx=0
            for i,w in enumerate(p):
                s+=w
                if u<=s:
                    choice_idx=i; break
            seq.append(labels[choice_idx])
        else:
            seq.append(labels[rr_idx])
            rr_idx=(rr_idx+1)%n
    # digest
    # Build compact control hash from seq + tie mask + rng_calls + seed
    h=hashlib.sha256()
    h.update(("seed=%d"%ctrl_seed).encode())
    h.update(bytes(tie_mask))
    h.update("|".join(seq).encode())
    h.update(("rng_calls=%d"%rng_calls).encode())
    ctrl_hash = h.hexdigest()
    # per-label tie frequency
    tie_total=sum(tie_mask)
    tie_counts={lab:0 for lab in labels}
    for s,tm in zip(seq,tie_mask):
        if tm: tie_counts[s]+=1
    tie_freq=[(tie_counts[l]/tie_total if tie_total>0 else 0.0) for l in labels]
    return {
        "M":M,"v":v,"p":p,"seq":seq,"tie_mask":tie_mask,
        "rng_calls":rng_calls,"ctrl_hash":ctrl_hash,"tie_freq":tie_freq
    }

# ---------- diagnostics (WRITE-ONLY) ----------
def run_diagnostics(diag_cfg: Dict, seq: List[str], tie_mask: List[int]) -> Dict[str,object]:
    # Diagnostics are intentionally computational but PURE; they only read seq/tie_mask
    # and have their own RNG seeded by diag_seed. They may compute smoothed indicators and faux "TCG/JFM" stats.
    rng = random.Random(int(diag_cfg["diag_seed"]))
    win  = int(diag_cfg["smooth_win"])
    depth= int(diag_cfg["tcg_depth"])
    jwin = int(diag_cfg["jfm_win"])
    labels = sorted(set(seq))
    # build indicator matrix [label][t] in 0/1
    indic = {lab:[1 if s==lab else 0 for s in seq] for lab in labels}
    # smoothing (simple box filter of length win)
    smoothed = {}
    for lab,arr in indic.items():
        sm=[0.0]*len(arr)
        acc=0; q=win
        for t,x in enumerate(arr):
            acc+=x
            if t>=q: acc-=arr[t-q]
            sm[t]=acc/float(min(t+1,q))
        smoothed[lab]=sm
    # faux TCG: average absolute finite diff of smoothed signal over 'depth'
    tcg={}
    for lab,sm in smoothed.items():
        s=0.0; c=0
        for t in range(depth,len(sm)):
            s += abs(sm[t]-sm[t-depth])
            c += 1
        tcg[lab] = (s/c if c>0 else 0.0)
    # faux JFM: moving-window inner product between two random fixed probes
    probes = {lab:[rng.random() for _ in range(jwin)] for lab in labels}
    jfm={}
    for lab,arr in indic.items():
        s=0.0; c=0
        for t in range(len(arr)-jwin):
            dot=sum(arr[t+k]*probes[lab][k] for k in range(jwin))
            s+=dot; c+=1
        jfm[lab]=(s/c if c>0 else 0.0)
    # diagnostics hash
    digest = json.dumps({"win":win,"depth":depth,"jwin":jwin,"tcg":tcg,"jfm":jfm}, sort_keys=True)
    diag_hash = sha256_str(digest)
    return {"tcg":tcg,"jfm":jfm,"diag_hash":diag_hash}

# ---------- run an experiment ----------
def run_experiment(manifest: Dict, diag_cfg: Dict, tag: str, outdir: str) -> Dict[str,object]:
    labels = manifest["labels"]
    H      = int(manifest["H"])
    tie_every    = int(manifest["control"]["tie_every"])
    ctrl_seed    = int(manifest["control"]["control_seed"])
    eta          = float(manifest["control"]["kernel_eta"])
    power        = float(manifest["control"]["born_power"])
    # CONTROL
    ctrl = run_control(labels, H, tie_every, ctrl_seed, eta, power)
    # DIAGNOSTICS (read-only)
    dsum = run_diagnostics(diag_cfg, ctrl["seq"], ctrl["tie_mask"])
    # Write control trace
    ct_path=os.path.join(outdir, f"outputs/metrics/control_trace_{tag}.csv")
    with open(ct_path,"w",newline="",encoding="utf-8") as f:
        w=csv.writer(f); w.writerow(["t","choice","tie"])
        for t,(s,tm) in enumerate(zip(ctrl["seq"], ctrl["tie_mask"])):
            w.writerow([t,s,tm])
    # Write diag summary
    dg_path=os.path.join(outdir, f"outputs/metrics/diag_summary_{tag}.csv")
    with open(dg_path,"w",newline="",encoding="utf-8") as f:
        w=csv.writer(f); w.writerow(["label","tcg","jfm"])
        labs=sorted(dsum["tcg"].keys())
        for lab in labs:
            w.writerow([lab, f"{dsum['tcg'][lab]:.9f}", f"{dsum['jfm'][lab]:.9f}"])
        w.writerow(["diag_hash", dsum["diag_hash"]])
    return {"ctrl":ctrl,"diag":dsum}

# ---------- full I26 run ----------
def run_i26(M: Dict, outdir: str) -> Dict[str,object]:
    # Baseline A
    A = run_experiment(M, M["diagnostics"]["A"], "A", outdir)
    # Variant B (different diagnostic knobs)
    B = run_experiment(M, M["diagnostics"]["B"], "B", outdir)
    # Replay A (same config as baseline)
    A2= run_experiment(M, M["diagnostics"]["A"], "A_replay", outdir)

    # Acceptance checks
    ctrl_hash_A  = A["ctrl"]["ctrl_hash"]
    ctrl_hash_B  = B["ctrl"]["ctrl_hash"]
    ctrl_hash_A2 = A2["ctrl"]["ctrl_hash"]
    control_equal_AB  = (ctrl_hash_A == ctrl_hash_B)
    control_equal_AA2 = (ctrl_hash_A == ctrl_hash_A2)

    tie_events_A  = sum(A["ctrl"]["tie_mask"])
    tie_events_B  = sum(B["ctrl"]["tie_mask"])
    rng_calls_A   = A["ctrl"]["rng_calls"]
    rng_calls_B   = B["ctrl"]["rng_calls"]
    rng_isolation_ok = (rng_calls_A == tie_events_A == rng_calls_B == tie_events_B)

    diag_hash_A = A["diag"]["diag_hash"]
    diag_hash_B = B["diag"]["diag_hash"]
    diag_hash_A2= A2["diag"]["diag_hash"]
    diag_changed = (diag_hash_A != diag_hash_B)
    diag_repro   = (diag_hash_A == diag_hash_A2)

    # Optional: tie frequency consistency across A vs A2 (same seed)
    tie_freq_eq = all(abs(x-y) < 1e-15 for x,y in zip(A["ctrl"]["tie_freq"], A2["ctrl"]["tie_freq"]))

    passed = bool(control_equal_AB and control_equal_AA2 and rng_isolation_ok and diag_changed and diag_repro and tie_freq_eq)

    # Audit
    audit={
      "sim":"I26_diag_leak_replay",
      "labels": M["labels"],
      "H": M["H"],
      "control": M["control"],
      "diagnostics": M["diagnostics"],
      "hashes":{
        "control_A":ctrl_hash_A, "control_B":ctrl_hash_B, "control_A_replay":ctrl_hash_A2,
        "diag_A":diag_hash_A, "diag_B":diag_hash_B, "diag_A_replay":diag_hash_A2
      },
      "flags":{
        "control_equal_AB":control_equal_AB,
        "control_equal_AA2":control_equal_AA2,
        "rng_isolation_ok":rng_isolation_ok,
        "diag_changed":diag_changed,
        "diag_repro":diag_repro,
        "tie_freq_eq_A_vs_Areplay": tie_freq_eq
      },
      "counts":{
        "tie_events_A":tie_events_A, "rng_calls_A":rng_calls_A,
        "tie_events_B":tie_events_B, "rng_calls_B":rng_calls_B
      },
      "passed":passed
    }
    dump_json(os.path.join(outdir,"outputs/audits","i26_audit.json"), audit)

    # Result line
    res = (f"I26 PASS={passed} ctrl_eq(A,B)={control_equal_AB} ctrl_eq(A,A2)={control_equal_AA2} "
           f"rng_iso={rng_isolation_ok} diag_changed={diag_changed} diag_repro={diag_repro}")
    write_text(os.path.join(outdir,"outputs/run_info","result_line.txt"), res)
    print(res)
    return audit

def main():
    ap=argparse.ArgumentParser()
    ap.add_argument("--manifest",required=True)
    ap.add_argument("--outdir",required=True)
    args=ap.parse_args()
    ensure_dirs(args.outdir,"config","outputs/metrics","outputs/audits","outputs/run_info","logs")
    with open(args.manifest,"r",encoding="utf-8") as f:
        M=json.load(f)
    # persist manifest and env
    dump_json(os.path.join(args.outdir,"config","manifest_i26.json"), M)
    write_text(os.path.join(args.outdir,"logs","env.txt"),
               f"utc={utc_ts()}\\nos={os.name}\\npython={sys.version.split()[0]}\\n")
    run_i26(M, args.outdir)

if __name__ == "__main__":
    main()
